#coding:gbk;
import pandas as pd
import numpy as np

#begin: change to current dir
from os import path,chdir,getcwd,system
from sys import argv
s1=path.dirname(argv[0]) 
cwd=s1 if s1!="" else getcwd()
chdir(cwd)
del s1
#end:  change to current dir
import Levenshtein as lv
df=pd.read_csv("abstracts.csv")
df=df.dropna().reset_index(drop=True)
df[['vs%d'%i for i in df.index]]=-1
for i in df.index:
    for j in range(i+1,df.index.max()+1):
        df.loc[i,'vs%d'%j]=lv.distance(df.loc[i,'abstracts'], df.loc[j,'abstracts'])

df.to_csv('abstracts_lv.csv')
tmp=[]
for i in df.index:
    for j in range(i+1,df.index.max()+1):
        if df.loc[i,'vs%d'%j]<40:  #
            tmp.append(i)
            
df=df.drop(index=tmp).reset_index(drop=True)
df.to_csv('abstracts_lv_clear.csv')

